rsubmit;
data delist; set crsp.mse (keep = permno date event dlret dlstcd);
where event="DELIST" and dlret<10 and 1962<year(date)<2021;
year=year(date); month=month(date); drop date; proc sort; by permno year month;
data crsp; set crsp.msf (keep = permno cusip shrout hsiccd date ret prc);
where 1962<year(date)<2021; year=year(date); month=month(date);
quar=qtr(date); price=abs(prc); sic=hsiccd; me=abs(prc)*shrout;
proc sort; by permno year month; data crsp; merge crsp delist;
by permno year month; drop date prc shrout; proc means; run;

data exch; set crsp.mse (keep = date permno exchcd shrcd siccd);
year=year(date); month=month(date); drop date;
proc sort data=exch; by permno year month;
data crsp; merge crsp exch; by permno year month;
data temp.crsp; set crsp; by permno year month; i=0; do while(i<2);
lexchcd = lag(exchcd); lsiccd = lag(siccd); lshrcd=lag(shrcd);
if permno = lag(permno) and exchcd = . then exchcd = lexchcd;
if permno = lag(permno) and shrcd = . then shrcd = lshrcd;
if permno = lag(permno) and siccd = . then siccd = lsiccd;
i+1; end; if last.month then output; drop i lexchcd lshrcd lsiccd;
proc sort data=temp.crsp nodupkey; by permno year month;
data temp.crsp; set temp.crsp;
if (nmiss(dlret)=1 or dlret=0) and 500<=dlstcd<600 and (exchcd ne 0 
and exchcd ne 3 and exchcd ne 33) then dlret=-0.3;
if (nmiss(dlret)=1 or dlret=0) and 500<=dlstcd<600 and
(exchcd=0 or exchcd=3 or exchcd=33) then dlret=-0.55;
if nmiss(ret)=1 and nmiss(dlret)=0 then return=dlret*100;
else return=ret*100; if shrcd ne 10 and shrcd ne 11 then delete;
drop event ret dlret dlstcd shrcd hsiccd; proc means nolabels; run;

rsubmit;
data crsp; set temp.crsp; if sic<1000 and sic not in (100, 116, 139, 173, 780) and (130<=sic<=133)=0 then cons=1;
if sic=1490 then cons=1; if 1000<=sic<200 and sic ne 1490 and sic not in (1020, 1450, 1470) and (1200<=sic<=1240)=0 then inv=1;
if sic in (2295, 2299, 2400, 2420, 2421, 2426, 2429, 2430, 2431, 2434, 2435, 2436, 2450, 2451, 2452,
2491, 2493, 2520, 2521, 2522, 2530, 2540, 2541, 2542, 2590, 2599, 2610, 2819, 2893, 2895, 2950, 2951, 2952) then inv=1;
if sic in (2499, 2500, 2510, 2511, 2512, 2514, 2515, 2517, 2519, 2591, 2730, 2731) or 2000<=sic<2100
and sic not in (2044, 2070, 2074, 2075, 2076, 2077, 2083) or sic in (2100, 2110, 2120, 2130) or 2200<=sic<2400
and sic not in (2290, 2295, 2296, 2297, 2299) or sic in (2490, 2499) then cons=1; if sic in (2295, 2299) then inv=1;
if 2400<=sic<2500 and sic not in (2410, 2439, 2440, 2441, 2448, 2449, 2490, 2499) then inv=1;
if 2520<=sic<2545 or sic=2590 or sic=2599 then inv=1; if 2500<=sic<2520 or sic=2591 then cons=1;
if sic=2610 then inv=1; if 2600<=sic<2700 and sic not in (2610, 2620, 2630, 2675, 2677) then cons=1;
if sic in (2800, 2830, 2840, 2841, 2842, 2844, 2861, 2870, 2879, 2890, 2891, 2899) then cons=1;
if sic in (2700, 2710, 2720, 2730, 2731, 2740, 2770, 2780, 2782) then cons=1;
if sic in (2819, 2893, 2895) then inv=1; if sic in (2950, 2951, 2952) then inv=1;
if sic in (2900, 2910, 2990, 2992) then cons=1; if sic in (3000, 3010, 3020, 3050, 3052, 3053, 3080) then cons=1;
if 3200<=sic<=3230 or sic in (3260, 3262, 3263, 3269, 3280, 3290, 3291, 3295, 3299) and sic ne 3221 then cons=1;
if sic=3221 or 3240<=sic<3260 and sic ne 3250 and sic ne 3255 or sic in (3261, 3264, 3292, 3296, 3297) or 3270<=sic<3280
then inv=1; if 3100<=sic<3200 and sic ne 3110 and sic ne 3130 then cons=1; if 3300<=sic<=3355 or sic=3357 then inv=1;
if sic in (3400, 3420, 3421, 3423, 3429, 3430, 3433, 3466, 3469, 3484, 3493, 3495, 3496) then cons=1;
if sic in (3425, 3432, 3440, 3442, 3443, 3444, 3449, 3470, 3471, 3479, 3490, 3491, 3492, 3494, 3498, 3499) then inv=1;
if 3500<=sic<3599 and sic not in (3519, 3533, 3562, 3566, 3568, 3590, 3592) then inv=1;
if 3610<=sic<3630 or sic=3643 or sic=3644 or 3660<=sic<3670 or sic=3699 then inv=1;
if sic=3600 or 3630<=sic<3660 or 3690<=sic<3699 then cons=1; if sic in (3713, 3715, 3740, 3799) then inv=1;
if 3700<=sic<3720 or sic in (3732, 3750, 3792) then cons=1; if sic in (3850, 3870, 3842) then cons=1;
if sic in (3713, 3715, 3740, 3799) then inv=1; if sic in (3842, 3850, 3870) then cons=1;
if sic=3800 or 3820<=sic<3850 and sic ne 3822 and sic ne 3842 or sic=3860 then inv=1; if sic=3993 then inv=1;
if 3910<=sic<=3952 or 3960<=sic<=3991 or sic=3999 then cons=1; if sic=8710 then inv=1;
if 4000<=sic<4400 or sic=4500 or 4100<=sic<4730 or 4740<=sic<=4780 or sic=4789 or 4800<=sic<4999 then cons=1;
if 7000<=sic<7310 or 7320<=sic<7350 or sic=7380 or 7383<=sic<7999 then cons=1;
if 8000<=sic<8700 or sic=8733 then cons=1; proc means nolabels; run;

rsubmit;
data crsp; set crsp; lme=lag(me); if cusip ne lag(cusip) then lme=.;
if year<1964 or year>2020 then delete; proc sort; by year month;
proc means data=crsp noprint; var return; by year month; weight lme;
where cons=1; output out=consvw mean(return) = consvw; run;
proc means data=crsp noprint; var return; by year month;
where cons=1; output out=consew mean(return) = consew; run;
proc means data=crsp noprint; var return; by year month; weight lme;
where inv=1; output out=invvw mean(return) = invvw; run;
proc means data=crsp noprint; var return; by year month;
where inv=1; output out=invew mean(return) = invew; run;
data temp.imc; merge consvw consew invvw invew; by year month;
drop _freq_ _type_; imcvw=invvw-consvw; imcew=invew-consew;
drop invvw consvw invew consew; proc means; run;

rsubmit;
data compann; set comp.funda (keep = gvkey cusip datadate sich
ppegt capx csho dltt invt txdb pstkrv ib xint prcc_f datafmt indfmt popsrc consol);
where indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C';
if (4900<=sich<5000 or 6000<=sich<7000) then delete;
year=year(datadate); month=month(datadate); cusip=substr(cusip,1,8);
if month>6 then year=year+1; if year<1961 or year>2017 then delete;
mve=csho*prcc_f; if nmiss(txdb) then txdb=0; if nmiss(xint) then xint=0;
cusip=substr(cusip,1,8); tq=(csho*prcc_f+dltt+pstkrv-txdb-invt)/ppegt;
pe=(csho*prcc_f+dltt+pstkrv-txdb)/(ib+xint); if tq<=0 then delete; 
keep gvkey cusip year tq pe ppegt capx; proc sort data=compann nodupkey; by gvkey year;
data temp.compannk; set compann; lgvkey=lag(gvkey); lppe=lag(ppegt);
ik=capx/lppe; if gvkey ne lgvkey then ik=.; if ik<=0 then delete;
lik=lag(ik); dik=(ik-lik)/lik; if gvkey ne lgvkey then dik=.;
drop lgvkey lppe ppegt capx lik; proc means n mean p10 p25 p50 p75 p90; run;

rsubmit;
data compann; set comp.fundq (keep = gvkey cusip datadate
ppegtq capxy cshoq prccq dlttq txditcq pstkq invtq
ceqq atq ibq dpq datafmt indfmt popsrc consol);
where indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C';
if (4900<=sic<5000 or 6000<=sic<7000) then delete;
year=year(datadate); quar=qtr(datadate); quar=quar+1;
if quar>4 then year=year+1; if quar>4 then quar=quar-4;
if year<1980 or year>2017 then delete; cusip=substr(cusip,1,8);
if nmiss(txdb) then txdb=0; bta=log(ceqq/atq); cf=ibq+dpq;
tq=(cshoq*prccq+dlttq+pstkq-txditcq-invtq)/ppegtq; logk=log(ppegtq);
keep gvkey quar cusip year ppegtq capxy tq cf bta logk;
proc sort data=compann nodupkey; by gvkey year quar;
data temp.ik; set compann; lgvkey=lag(gvkey); lppe=lag(ppegtq);
ik=capxy/lppe; cfk=cf/lppe; if gvkey ne lgvkey then ik=.; if ik<=0 then delete;
if gvkey ne lgvkey then cfk=.; lbta=lag(bta); if gvkey ne lgvkey then lbta=.;
ltq=lag(tq); if gvkey ne lgvkey then ltq=.; lcfk=lag(cfk); if gvkey ne lgvkey then lcfk=.;
lagk=lag(logk); if gvkey ne lgvkey then lagk=.; lik=lag(ik); dik=(ik-lik)/lik; if gvkey ne lgvkey then dik=.;
drop lgvkey lppe ppegtq capxy lik logk cfk cf tq; proc means n mean p10 p25 p50 p75 p90; run;

rsubmit;
data imc; set temp.imc; lvw=log(1+imcvw/100);
lew=log(1+imcew/100); quar=floor((month-0.01)/3)+1;
proc means data=imc noprint; var lvw lew;
by year quar; output out=imcq sum(lvw lew)=lvw lew;
data temp.imcquar; set imcq; imcqvw=(exp(lvw)-1)*100;
imcqew=(exp(lew)-1)*100; drop _freq_ _type_ lvw lew;
quar=quar+2; if quar>4 then year=year+1; if quar>4 then quar=quar-4;
proc sort; by year quar; proc sort data=temp.ik; by year quar;
data temp.ik; merge temp.ik (in=in1) temp.imcquar; by year quar;
if in1; yearq=(year-1980)*4+quar; proc sort; by gvkey yearq; proc means; run;

rsubmit;
options nosource nonotes errors=0;
%let begyear=25; %let endyear=152;
%macro makebeta(st);  
%do myear=%eval(&begyear) %to %eval(&endyear);;
data perms; set temp.ik (keep=gvkey yearq);
if yearq eq %eval(&myear); proc sort nodupkey; by gvkey;
data sub; set temp.ik; if yearq ge %eval(&myear-20)
and yearq le %eval(&myear-1); proc sort; by gvkey;
proc sql; create table regdata as select sub.*
from perms, work.sub where perms.gvkey = sub.gvkey; quit;
proc sort data=regdata; by gvkey; 
proc reg data=regdata outest=temp.ivol&myear noprint;
model ik = imcqvw lbta ltq lcfk lagk / edf; by gvkey;
data temp.ivol&myear; set temp.ivol&myear;
if _edf_+_p_<8 then delete; yearq = %eval(&myear);
invsens=imcqvw; keep gvkey yearq invsens;
run; %end; %mend; %makebeta(1);

rsubmit;
options source notes errors=5;
%let begyear=25; %let endyear=152;
%macro makebeta(st); data temp.invsensvw; set 
%do myear=%eval(&begyear) %to %eval(&endyear);
temp.ivol&myear %end;; %mend; %makebeta(1);
data temp.invsensvw; set temp.invsensvw;
year=floor((yearq-0.01)/4)+1980;
quar=yearq-(year-1980)*4; drop _type_ _freq_ yearq;
proc sort; by gvkey year quar; proc means; run;

rsubmit;
options nosource nonotes errors=0;
%let begyear=25; %let endyear=152;
%macro makebeta(st);  
%do myear=%eval(&begyear) %to %eval(&endyear);;
data perms; set temp.ik (keep=gvkey yearq);
if yearq eq %eval(&myear); proc sort nodupkey; by gvkey;
data sub; set temp.ik; if yearq ge %eval(&myear-20)
and yearq le %eval(&myear-1); proc sort; by gvkey;
proc sql; create table regdata as select sub.*
from perms, work.sub where perms.gvkey = sub.gvkey; quit;
proc sort data=regdata; by gvkey; 
proc reg data=regdata outest=temp.ivol&myear noprint;
model dik = imcqew lbta ltq lcfk lagk / edf; by gvkey;
data temp.ivol&myear; set temp.ivol&myear;
if _edf_+_p_<8 then delete; yearq = %eval(&myear);
invsens=imcqew; keep gvkey yearq invsens;
run; %end; %mend; %makebeta(1);

rsubmit;
options source notes errors=5;
%let begyear=25; %let endyear=152;
%macro makebeta(st); data temp.invsensew; set 
%do myear=%eval(&begyear) %to %eval(&endyear);
temp.ivol&myear %end;; %mend; %makebeta(1);
data temp.invsensew; set temp.invsensew;
year=floor((yearq-0.01)/4)+1980;
quar=yearq-(year-1980)*4; drop _type_ _freq_ yearq;
proc sort; by gvkey year quar; proc means; run;

rsubmit;
data gvkey; set crsp.ccmxpf_linktable (keep = gvkey lpermno linkdt linkenddt);
where nmiss(lpermno)=0; linkstart=(year(linkdt)-1963)*12+month(linkdt);
linkend=(year(linkenddt)-1963)*12+month(linkenddt); if nmiss(linkend) then linkend=684;
data cap; set crsp.msf (keep = permno cusip date); where 1961<year(date)<2020;
year=year(date); month=month(date); yearm=(year-1963)*12+month; drop date; proc sort; by permno yearm;
proc sql; create table snpgvkey as select gvkey.linkstart, gvkey.linkend, gvkey.gvkey, cap.*
from cap, gvkey where gvkey.linkstart<=cap.yearm<gvkey.linkend and cap.permno=gvkey.lpermno; quit;
proc sort nodupkey; by gvkey year month; proc means; proc print data=snpgvkey (obs=100); run;

rsubmit;
options source notes errors=5;
data compann; set temp.compann2017; year=year+1; proc sort; by gvkey year;
data compann; merge compann snpgvkey; by gvkey year; proc sort; by permno year month;
data ivol; merge temp.ivol2019 compann; by permno year month;
data return; set temp.crsp (keep = permno year month return me);
data ivol; merge ivol return; by permno year month;
quar=floor((month-0.01)/3)+1; proc sort; by gvkey year quar;
proc sort data=temp.invsensvw; by year quar;
proc univariate data=temp.invsensvw noprint; var invsens; by year quar;
output out=decile pctlpts=1 99 pctlpre=dec;
data invsensvw; merge decile temp.invsensvw; by year quar;
data invsensvw; set invsensvw; if invsens<dec1 or invsens>dec99
then invsens=.; drop dec1 dec99; proc sort; by gvkey year quar;
data ivol; merge ivol invsensvw; by gvkey year quar; proc sort; by permno year;
data cap; set temp.crsp (keep = permno year month price exchcd);
where month=12; year=year+1; proc sort; by permno year;
data ivol; merge cap ivol; by permno year; proc sort; by year month; run;

proc univariate data=ivol noprint;
var ivol; by year month; where exchcd=1;
output out=decile pctlpts = 20 to 80 by 20 pctlpre=dec;
DATA ivol; MERGE ivol decile; BY year month;
  pdecile=1;
  IF ivol > DEC20 THEN pdecile=2;
  IF ivol > DEC40 THEN pdecile=3;
  IF ivol > DEC60 THEN pdecile=4;
  IF ivol > DEC80 THEN pdecile=5;
  if nmiss(ivol) then pdecile=.;
drop dec20 dec40 dec60 dec80;

proc univariate data=ivol noprint;
var cfp; by year; where exchcd=1 and cfp>0;
output out=decile pctlpts = 20 to 80 by 20 pctlpre=dec;
DATA ivol; MERGE ivol decile; BY year;
  IF cfp < 0 THEN cfpdecile=0;
  IF 0 <= cfp < DEC20 THEN cfpdecile=1;
  IF cfp > DEC20 THEN cfpdecile=2;
  IF cfp > DEC40 THEN cfpdecile=3;
  IF cfp > DEC60 THEN cfpdecile=4;
  IF cfp > DEC80 THEN cfpdecile=5;
  if nmiss(cfp) then cfpdecile=.;
drop dec20 dec40 dec60 dec80;

proc sort data=ivol; by cusip year month;
data ivol; set ivol; lme=lag(me);
lcusip=lag(cusip); ivoldecile=lag(pdecile);
if lcusip ne cusip then ivoldecile=.;
if lcusip ne cusip then lme=.; proc means nolabels; run;

rsubmit;
options nosource nonotes errors=0;
proc sort data=ivol; by year month cfpdecile ivoldecile;
proc means data=ivol noprint;
var invsens; by year month cfpdecile ivoldecile;
output out=factor mean(invsens) = invsens;
%macro a(many); %do i=1 %to 5; %do j=0 %to 5;
data factor&i&j; set factor; where ivoldecile=&i and cfpdecile=&j;
invsens&i&j=invsens; drop invsens ivoldecile cfpdecile;
run; %end; %end; %mend a; %a(1);
data temp.factor;
merge factor10 factor11 factor12 factor13 factor14 factor15
factor20 factor21 factor22 factor23 factor24 factor25
factor30 factor31 factor32 factor33 factor34 factor35
factor40 factor41 factor42 factor43 factor44 factor45
factor50 factor51 factor52 factor53 factor54 factor55;
by year month; if nmiss(month) then delete;
if year<1986 or year>2017 then delete;
drop _freq_ _type_; proc means data=temp.factor; run;
proc download data=temp.factor
out=sasuser.factor25 (replace=yes); run;
endrsubmit;
PROC EXPORT DATA= SASUSER.FACTOR25 
OUTFILE= "C:\Sasha\Idiosyncratic Volatility\2021.xls"
DBMS=EXCEL REPLACE; SHEET="invsenscfpmean"; RUN;
rsubmit;
options nosource nonotes errors=0;
proc sort data=ivol; by year month cfpdecile ivoldecile;
proc means data=ivol noprint;
var invsens; by year month cfpdecile ivoldecile;
output out=factor median(invsens) = invsens;
%macro a(many); %do i=1 %to 5; %do j=0 %to 5;
data factor&i&j; set factor; where ivoldecile=&i and cfpdecile=&j;
invsens&i&j=invsens; drop invsens ivoldecile cfpdecile;
run; %end; %end; %mend a; %a(1);
data temp.factor;
merge factor10 factor11 factor12 factor13 factor14 factor15
factor20 factor21 factor22 factor23 factor24 factor25
factor30 factor31 factor32 factor33 factor34 factor35
factor40 factor41 factor42 factor43 factor44 factor45
factor50 factor51 factor52 factor53 factor54 factor55;
by year month; if nmiss(month) then delete;
if year<1986 or year>2017 then delete;
drop _freq_ _type_; proc means data=temp.factor; run;
proc download data=temp.factor
out=sasuser.factor25 (replace=yes); run;
endrsubmit;
PROC EXPORT DATA= SASUSER.FACTOR25 
OUTFILE= "C:\Sasha\Idiosyncratic Volatility\2021.xls"
DBMS=EXCEL REPLACE; SHEET="invsenscfpmed"; RUN;

"proc sort data=temp.invsensvw; by year quar;
proc univariate data=temp.invsensvw noprint; var invsens; by year quar;
output out=decile pctlpts=1 99 pctlpre=dec;
data invsensvw; merge decile temp.invsensvw; by year quar;
data invsensvw; set invsensvw; if invsens<dec1 or invsens>dec99
then invsens=.; drop dec1 dec99; proc sort; by gvkey year quar;"

